Exported from analysis/transportation-rail.ipynb committed by GitHub Action on Sat Feb 19 00:54:11 2022 revision 1, 0e9088e

US Macroeconomic Data Explorer | US Railroad Freight Traffic

US Railroad Freight Traffic


A look at railroad freight volumes, according to reports obtained from BNSF, CSX, Norfolk Southern, and Union Pacific.

In [32]:
import pandas as pd
import altair as alt
import numpy as np
import re
from joblib import Memory
from time import sleep
from datetime import date
from IPython.core.display import HTML

from tabula import read_pdf
from tika import parser

from urllib.request import urlopen, Request
from urllib.parse import urlencode
from urllib.error import URLError
from http.client import InvalidURL
from json import dumps, loads

# need {unidecode, fake_useragent}

from googleapi.google import search as googleSearch
from urllib.error import HTTPError
from io import BytesIO
from os import environ

memory = Memory('data/', verbose=0)

uaString = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'

if environ.get("GITHUB_WORKFLOW"):
    raise RuntimeError("Requires manual updates")
In [2]:
%%capture
%%time

@memory.cache
def parseCsxReport(urlAndWeek):
    try:
        url, weekAndYear = urlAndWeek
    
        # '2013 Week 29'
        year, week = weekAndYear.split(' Week ')
        
    except ValueError:
        try:
            url, weekAndYear = urlAndWeek
    
            # '2013 Week 29'
            year, week = weekAndYear.split('-WK')
        except ValueError:
            print(urlAndWeek)
            return None
    
    #            WEEK   28                QUARTER TO DATE           YEAR TO DATE
    baseCols = ['Cargo', 'Week {0} {1}', 'Week {0} {2}', 'Week {0} pct-change',
                'Week {0} {1} QTD', 'Week {0} {2} QTD', 'Week {0} QTD pct-change',
                'Week {0} {1} YTD', 'Week {0} {2} YTD', 'Week {0} YTD pct-change']

    try:
        cols = [c.format(week.split(' ')[0], int(year), int(year)-1) for c in baseCols]
    except ValueError:
        print(urlAndWeek)
        return None
    
    try:
        tmp = read_pdf(url, pages='all', pandas_options={ 'names': cols })
        df = pd.concat(tmp) if isinstance(tmp, list) else tmp
        return df.iloc[1:, :].set_index('Cargo').applymap(lambda v: float(re.sub('[^0-9.]', '', v)))
    except AttributeError:
        print(urlAndWeek)
        return None
    except ValueError:
        print(urlAndWeek)
        return None
        
#parseReport(('http://csx.gcs-web.com/static-files/f2baf981-9658-41f8-9ec0-400e9fdbde11', '2013 Week 29')).head(30)

#parseReport(('http://csx.gcs-web.com/static-files/2d4f737e-1314-46aa-9e8c-a29b40be2a2b', '2019 Week 49 AAR')).head(30)

def getWeekFrom(u):
    return re.findall("(?:Week-|WK|Week_|w|week)(\d\d?)", u)[0]

#getWeekFrom('https://s2.q4cdn.com/859568992/files/doc_downloads/volume_trends/2014/CSX_AAR-2014-WK20_csx.pdf')

def getUrlsAndWeeks(res):
    urls = []
    for item in res['GetContentAssetListResult']:
        u = item['FilePath']

        if "2020" in u:
            year = 2020
        elif "2021" in u:
            year = 2021
        else:
            year = u.split("/")[-2]
        try:
            week = getWeekFrom(u)
            yearAndWeek = f"{year} Week {week}"
            urls.append((u, yearAndWeek))
        except IndexError:
            continue
            
    return urls

reportListUrl = "https://investors.csx.com/Services/ContentAssetService.svc/GetContentAssetList"

params = lambda y: {
    "serviceDto": {
        "ViewType": "2",
        "ViewDate": "",
        "RevisionNumber": "1",
        "LanguageId": "1",
        "Signature": "",
        "ItemCount": -1,
        "StartIndex": 0,
        "TagList": [],
        "IncludeTags": True
    },
    "assetType": "Volume Trends",
    "excludeSelection": 1,
    "year": y
}

headers = {
    'User-Agent': uaString,
    'Content-Type': 'application/json; charset=UTF-8'
}

urls = []
for y in range(2014, 2022):
    
    req = Request(reportListUrl, data=dumps(params(y)).encode('ascii'), headers=headers)

    data = urlopen(req)
    
    res_dict = loads(data.read())
    
    urls = urls + getUrlsAndWeeks(res_dict)

dfs = map(parseCsxReport, urls)

merged = pd.concat(dfs, axis=1, sort=False).T

#merged.head()

subset = merged.filter(axis='index', regex='[0-9]$').copy()

subset['dt'] = subset.index.map(lambda v: pd.to_datetime(v.replace(' AAR', '') + ' 6', format='Week %W %Y %w', errors='coerce'))
In [3]:
HTML("<h4><i>(data current as of {})</i></h4>".format(date.today().strftime("%B %d, %Y")))
Out[3]:

(data current as of November 02, 2021)

CSX Freight Reports

In [4]:
def doChart(metric, df, color='grey'):
    pmp = df[['dt', metric]].sort_values('dt').copy()
    pmp2 = pmp.set_index('dt').resample('1M').mean().dropna().reset_index()

    brush = alt.selection(type='interval', encodings=['x'])
    
    base = alt.Chart(pmp2).mark_line(color='purple').encode(
        alt.X('dt:T', axis=alt.Axis(title='', format='%b %Y')),
        alt.Y(f'{metric}:Q', axis=alt.Axis(title='Volume [carloads]')),
        tooltip=[alt.Tooltip("dt:T", format="%b %Y"), alt.Tooltip(f"yoy:Q", format=",.02f")]
    ).properties(
        width=750,
        height=450
    )
    
    upper = base.mark_bar(color=color).transform_window(
        window=[alt.WindowFieldDef("lag", metric, param=12, **{ 'as': 'previous' })],
        sort=[{"field": "dt"}],
    ).transform_calculate(
        yoy=f"((datum['{metric}'] - datum.previous) / datum.previous) * 100"
    ).encode(
        alt.X('dt:T', axis=alt.Axis(title='', format='%b %Y'), scale=alt.Scale(domain=brush), impute=alt.ImputeParams(value=None)),
        alt.Y('yoy:Q', axis=alt.Axis(title='52-Week Growth [%]')),
        color=alt.condition("datum['yoy'] < 0",
            alt.value('lightsalmon'),
            alt.value(color)
        ),
        tooltip=[alt.Tooltip('dt:T', format='%B %Y', title='Period'), alt.Tooltip('yoy:Q', format=',.02f')]
    ).properties(
        title=f'CSX Freight Reports: {metric}'
    )
    
    lower = base.properties(
        height=100
    ).add_selection(brush)

    return (upper & lower).properties(
        background='white'
    )
    
    #return c.display()
    
def doYoyChart(df, metric = 'MOTOR VEHICLES', carrier="BNSF", period=52, freq='W', subset=-260, size=2, color='royalblue'):
    #yoy = df.set_index('dt').pct_change(period, freq=freq).apply(lambda v: v * 100).sort_index().reset_index().copy()
    
    # .transform_calculate(
    #    val=f"datum['{metric}'] > 100 ? NaN : datum['{metric}']"
    #)
    brush = alt.selection(type='interval', encodings=['x'])
    
    base = alt.Chart(df[subset:]).mark_line(color='purple').encode(
        alt.X('dt:T', axis=alt.Axis(title='', format='%b %Y')),
        alt.Y(f'{metric}:Q', axis=alt.Axis(title='Volume [carloads]')),
        tooltip=[
            alt.Tooltip("dt:T", format="%b %Y"),
            alt.Tooltip(f"{metric}:Q")
        ]
    ).properties(
        width=750,
        height=450
    )
    
    upper = base.mark_bar(size=size, color=color).transform_window(
        window=[alt.WindowFieldDef("lag", metric, param=period, **{ 'as': 'previous' })],
        sort=[{"field": "dt"}],
    ).transform_calculate(
        yoy=f"((datum['{metric}'] - datum.previous) / datum.previous) * 100"
    ).encode(
        alt.X('dt:T', axis=alt.Axis(title='', format='%b %Y'), scale=alt.Scale(domain=brush), impute=alt.ImputeParams(value=None)),
        alt.Y('yoy:Q', axis=alt.Axis(title='Year-over-year Growth [%]'), impute=alt.ImputeParams(method='value', keyvals=[100, 1000])),
        color=alt.condition(f"datum['yoy'] < 0",
            alt.value('lightsalmon'),
            alt.value(color)
        ),
        tooltip=[
            alt.Tooltip('dt:T', format='%b %Y', title=''),
            alt.Tooltip(f'yoy:Q', format=',.0f', title='Year-over-year Growth [%]')
        ]
    ).properties(
        title=f"{carrier} trend for: {metric}"
    )
    
    lower = base.properties(
        height=100
    ).add_selection(brush)

    return (lower & upper).properties(
        background='white'
    )
In [5]:
metric = 'Total Traffic'
doChart(metric, subset, color='royalblue')
Out[5]:
In [6]:
metric = 'Total Carloads'
doChart(metric, subset, color='royalblue')
Out[6]:
In [7]:
metric = 'Total Intermodal'
doChart(metric, subset, color='royalblue')
Out[7]:
In [8]:
doChart('Primary Metal Products', subset, color='darkslategray')
Out[8]:
In [9]:
metric = 'Lumber & Wood Products'
doChart(metric, subset, color='darkolivegreen')
Out[9]:
In [10]:
metric = 'Motor Vehicles and Parts'
doChart(metric, subset, color='darkslategray')
Out[10]:
In [11]:
metric = 'Containers'
doChart(metric, subset, color='royalblue')
Out[11]:
In [12]:
metric = 'Coal'
doChart(metric, subset, color='black')
Out[12]:
In [13]:
metric = 'Chemicals'
doChart(metric, subset, color='darkseagreen')
Out[13]:
In [14]:
metric = 'Food Products'
doChart(metric, subset, color='darkolivegreen')
Out[14]:
In [15]:
metric = 'Grain'
doChart(metric, subset, color='darkseagreen')
Out[15]:
In [16]:
metric = 'Grain Mill Products'
doChart(metric, subset, color='teal')
Out[16]:

Union Pacific Freight Reports

In [33]:
# https://www.up.com/investor/aar-stb_reports/

from subprocess import CalledProcessError

upUrls = []
for y in range(2005, 2015):
    url = "'http://www.up.com/investors/reports/archive.cfm?Year={}'".format(y)
    res = !curl -L $url
    
    pdfs = re.findall('href="([^.]+.pdf)"', "".join(res))
    
    upUrls = upUrls + pdfs

@memory.cache
def parseUnionPacificArchive(u, retries=3):
    """
    Union Pacific Parser
    """
    if retries < 0:
        return None
    
    theUrl = "http://up.com" + u

    try:
        week = re.search(".*[0-9]{4}/(\d\d).pdf.*", u).group(1)
    except AttributeError:
        try:
            week = re.search(".*[0-9]{4}/week(\d\d)\_\d\d.pdf.*", u).group(1)
        except AttributeError:
            print("Failed: " + u)
            return None

    #print(week)

    try:
        req = Request(theUrl, data=None, headers={ 'User-Agent': uaString })

        data = urlopen(req)
        
        df_up = read_pdf(BytesIO(data.read()), pages='all', multiple_tables=False)[0]
        originalColumns = df_up.columns.to_list()
        df_up.columns = [str(originalColumns[1]) + '-' + str(week)] + originalColumns[1:]
        
        year = str(df_up.columns[1])
        
        if year[:2] in ['5,', '6,', '7,', '8,']:
            print(f"Invalid year for {u}")
            print(originalColumns)
            return None

        if year == '2009':
            # fetch 2008 data from the 2009 previous year column
            tmp = pd.concat([df_up.set_index(df_up.columns[0])[df_up.columns[1]].rename(year + '-' + str(week)),
                             df_up.set_index(df_up.columns[0])[df_up.columns[2]].rename("2008" + '-' + str(week))], axis=1)
        else:
            tmp = df_up.set_index(df_up.columns[0])[df_up.columns[1]].rename(year + '-' + str(week))

        return tmp
    except CalledProcessError:
        return None
    except KeyError:
        print("Failed: " + u)
        sleep(2 ** (3 - retries))
        return parseUnionPacificArchive(theUrl, retries-1)
    except IndexError:
        print("Failed: " + u)
        sleep(2 ** (3 - retries))
        return parseUnionPacificArchive(theUrl, retries-1)
    except URLError:
        print("Failed: " + u)
        sleep(2 ** (3 - retries))
        return parseUnionPacificArchive(theUrl, retries-1)
        
#parseArchive(upUrls[1]).head()
In [34]:
%%capture
#%%time

dfs_a = []
for u in upUrls:
    if "changes" in u:
        continue
    
    dfs_a.append(parseUnionPacificArchive(u))

df_aone = pd.concat(dfs_a, axis=1).T


validCols = [c for c in df_aone.columns if c]

df_aone[validCols] = df_aone[validCols].applymap(lambda v: str(v).replace(",", "") if not pd.isnull(v) else v)\
                        .applymap(lambda v: pd.to_numeric(v, errors='coerce'))

cleanColumn = lambda s: re.sub("^\d+ ", "", str(s)).replace(", N.E.C", "").replace("Prod.", "Products")

df_aone.columns = [cleanColumn(c) for c in df_aone.columns]

#df_aone.columns

tab = df_aone.T.reset_index().melt(id_vars='index').groupby(['index', 'variable']).agg('mean')\
        .reset_index().pivot(index='variable', columns='index')

tab = tab.droplevel(level=0, axis=1)
tab.index = tab.index.rename('dt')

#tab.head()
In [35]:
%%capture
#%%time

# curl -L 'https://www.up.com/up/investor/aar-stb_reports/2015_carloadings/index.htm'

upUrls2 = []
for y in range(2015, 2021):
    url = "'https://www.up.com/up/investor/aar-stb_reports/{}_carloadings/index.htm'".format(y)
    res = !curl -L $url
    
    pdfs = re.findall('href="([^.]+.pdf)"', "".join(res))
    
    upUrls2 = upUrls2 + pdfs

# For 2021
new_url = "https://www.up.com/investor/aar-stb_reports/2021_Carloads/index.htm"
new_res = !curl -L $new_url
new_pdfs = re.findall('href="([^.]+.pdf)"', "".join(new_res))
upUrls2 = upUrls2 + new_pdfs

@memory.cache
def parseUnionPacificArchive2(url, retries=3):
    """
    Union Pacific Parser 2
    """
    if retries < 0:
        return None
    
    theUrl2 = "http://up.com" + url

    try:
        week = re.search(".*_[\d]{4}-(\d\d?).pdf.*", url).group(1)
    except AttributeError:
        try:
            week = re.search(".*week_(\d\d?)\_[\d]{4}[^.]+.pdf.*", url).group(1)
        except AttributeError:
            try:
                week = re.search(".*_[\d]{4}_(\d\d?)[^\.]*.pdf.*", url).group(1)
            except AttributeError:
                print("Failed week: " + url)
                return None

    #print(week)

    try:
        req = Request(theUrl2, data=None, headers={ 'User-Agent': uaString })

        data = urlopen(req)
        
        df_up = read_pdf(BytesIO(data.read()), pages='all', multiple_tables=False)[0]
        df_up.columns = [str(df_up.columns[1]) + '-' + str(week)] + df_up.columns.to_list()[1:]

        tmp = df_up.set_index(df_up.columns[0])[df_up.columns[1]].rename(str(df_up.columns[1]) + '-' + str(week))

        return tmp
    except IndexError:
        print("Failed index: " + url)
        sleep(2 ** (3 - retries))
        return parseUnionPacificArchive2(url, retries-1)
    except HTTPError:
        print("Failed url: " + url)
        sleep(2 ** (3 - retries))
        return parseUnionPacificArchive2(url, retries-1)
    except InvalidURL:
        print("Failed url: " + url)
        return None
    
# parseArchive2(upUrls2[2]).head()

dfs_atwo = []
for u in upUrls2:
    if "changes" in u:
        continue
    
    dfs_atwo.append(parseUnionPacificArchive2(u))

df_atwo = pd.concat(dfs_atwo, axis=1).T# .head()

validCols = [c for c in df_atwo.columns if c]

df_atwo[validCols] = df_atwo[validCols].applymap(lambda v: str(v).replace(",", "") if not pd.isnull(v) else v)\
                        .applymap(lambda v: pd.to_numeric(v, errors='coerce'))

#df_atwo.columns
In [36]:
df_unionpac = pd.concat([tab, df_atwo])

df_unionpac['dt'] = df_unionpac.index.map(lambda v: pd.to_datetime(v+"-6", format="%Y-%W-%w", errors='coerce'))

#df_unionpac.head()

df_unionpac_yoy = df_unionpac.groupby('dt').agg('mean').resample('1W').nearest().pct_change(52, freq='W').apply(lambda v: v * 100).reset_index()

#df_unionpac_yoy.tail()
In [37]:
def doUpChart(df, metric='Total Carloads', color='purple'):
    return alt.Chart(df_unionpac.reset_index()[['dt', metric]]).mark_line(color=color).encode(
        alt.X('dt:T', axis=alt.Axis(title='')),
        alt.Y('{}:Q'.format(metric))
    ).properties(
        title=f"Union Pacific Railroad: {metric} History",
        width=750,
        height=400
    )
    
doUpChart(df_unionpac, 'Total Carloads')
Out[37]:
In [38]:
def doYoyUpChart(df, metric, color='royalblue', domain=[-30, 30]):

    brush = alt.selection(type='interval', encodings=['x'])
    
    base = alt.Chart(df).mark_line(color='purple').encode(
        alt.X('dt:T', axis=alt.Axis(title='', format='%b %Y'), impute=alt.ImputeParams(value=None)),
        alt.Y(f'{metric}:Q', axis=alt.Axis(title='Volume [carloads]'), impute=alt.ImputeParams(value=None)),
        tooltip=[alt.Tooltip("dt:T", format="%b %Y"), alt.Tooltip(f"yoy:Q", format=",.02f")]
    ).properties(
        width=750,
        height=450
    )
    
    upper = base.mark_bar(size=1.5, color=color).transform_window(
        window=[alt.WindowFieldDef("lag", metric, param=52, **{ 'as': 'previous' })],
        sort=[{"field": "dt"}],
    ).transform_calculate(
        yoy=f"((datum['{metric}'] - datum.previous) / datum.previous) * 100"
    ).encode(
        alt.X('dt:T', axis=alt.Axis(title='', format='%b %Y'), scale=alt.Scale(domain=brush), impute=alt.ImputeParams(value=None)),
        alt.Y('yoy:Q', axis=alt.Axis(title='Year-over-year Growth [%]'), scale=alt.Scale(domain=domain)),
        color=alt.condition(f"datum['yoy'] < 0",
            alt.value('lightsalmon'),
            alt.value(color)
        ),
        tooltip=[alt.Tooltip('dt:T', format='%b %Y'), alt.Tooltip(f'yoy:Q', format=',.0f')]
    ).properties(
        title=f"Union Pacific Railroad: {metric} Traffic"
    )
    
    lower = base.properties(
        height=100
    ).add_selection(brush)

    return (upper & lower).properties(
        background='white'
    )

c = doYoyUpChart(df_unionpac, 'Total Carloads')
c.save('transportation-rail.png')

c.display()
In [39]:
metric = 'Chemicals'
#doUpChart(df_unionpac, metric)

doYoyUpChart(df_unionpac, metric, color='darkseagreen')
Out[39]:
In [40]:
metric = 'Lumber & Wood Products'
#doUpChart(df_unionpac, metric)

doYoyUpChart(df_unionpac, metric, color='forestgreen', domain=[-60, 60])
Out[40]:
In [41]:
metric = 'Iron & Steel Scrap'
#doUpChart(df_unionpac, metric)

doYoyUpChart(df_unionpac, metric, color='slategray', domain=[-80, 80])
Out[41]:
In [42]:
metric = 'Metals & Products'
#doUpChart(df_unionpac, metric)

doYoyUpChart(df_unionpac, metric, color='slategray', domain=[-80, 120])
Out[42]:
In [43]:
metric = 'Motor Vehicles & Equipment'
#doUpChart(df_unionpac, metric)

doYoyUpChart(df_unionpac, metric, color='black', domain=[-85, 125])
Out[43]:
In [44]:
metric = 'Metallic Ores'
#doUpChart(df_unionpac, metric)

doYoyUpChart(df_unionpac, metric, color='darkslategray', domain=[-85, 170])
Out[44]:
In [45]:
metric = 'Nonmetallic Minerals'
#doUpChart(df_unionpac, metric)

doYoyUpChart(df_unionpac, metric, color='gray', domain=[-65, 100])
Out[45]:

~Norfolk Southern Freight Reports~

In [ ]:
%%time

raise RuntimeError("Broken")

numPages = 6

"""res2 = []
for page in range(1, numPages+1):
    sleep(1)
    res2 += googleSearch('site:nscorp.com filetype:pdf weekly merchandise car loadings', page)
    #print(len(res2))
"""
    
res2 = googleSearch('site:nscorp.com filetype:pdf weekly loadings', numPages)

links2 = [r.link for r in res2 if 'weekly merchandise' in r.description.lower() and 'car loadings' in r.description.lower()]
#len(links2)
In [ ]:
#%%time

def handle(df_ns, dt):
    try:
        date = pd.to_datetime(dt)
        df_ns2 = df_ns.iloc[:, :2].copy()
        df_ns2.columns = ['Key', 'Loadings']
        df_ns2 = df_ns2.dropna().set_index('Key').iloc[1:]

        df_ns2[date] = df_ns2['Loadings'].str.replace(",", "").apply(pd.to_numeric)

        return df_ns2[date].T
    except Exception as e:
        df_ns2[date] = df_ns2['Loadings'].str.replace(",", "").apply(lambda v: pd.to_numeric(v.split()[0]))
        
        #print(df_ns2.head())
        #print(e)
        return df_ns2[date].T
    
#@memory.cache
def parseNorfolkSouthern(url, retries=3):
    """
    NorfolkSouthern Parser
    """
    if retries < 0:
        return None
    
    location = url
    
    req = Request(location, data=None, headers={ 'User-Agent': uaString })

    data = urlopen(req)
    
    byteData = BytesIO(data.read())

    try:
        #print(f"Parsing {url}...")
        df_ns = read_pdf(byteData, pages='all')
    except Exception as e:
        sleep(2 ** (3 - retries))
        return parseNorfolkSouthern(location, retries-1)
    
    try:
        byteData.seek(0)
        raw = parser.from_buffer(byteData)
        
        if raw['content']:
            dts = re.findall(".* To: ([\d]{2}-[\d]{2}-[\d]{4}).*", raw['content'])
        else:
            #print(type(df_ns[0]))
            dts = [re.search(".* To: ([\d]{2}-[\d]{2}-[\d]{4}).*", df.columns[1]).group(1) for df in df_ns]
        
        return pd.concat([handle(f, t) for f, t in zip(df_ns, dts)], axis=1)
                          
    except AttributeError as e:
        print(f"Failed on {url}")
        #print(e)
        #print(df_ns.columns[1], df_ns.columns)
        return None
    
dfs_ns = [parseNorfolkSouthern(l) for l in set(links2)]
#for l in list(set(links2)):
#    dfs_ns.append(parseNorfolkSouthern(l))
    
df_ns_merged = pd.concat(dfs_ns, axis=1).T

# df_ns_merged.to_csv("norfolk_southern.csv", sep='|')

# df_ns_merged = pd.read_csv("norfolk_southern.csv", sep="|")

df_ns_merged = df_ns_merged.applymap(float)

df_ns_merged['dt'] = df_ns_merged.index.map(pd.to_datetime)

df_ns_ts = df_ns_merged.groupby('dt').agg('mean').resample('1M').nearest().reset_index()
In [ ]:
doYoyChart(df_ns_ts, metric='Motor Vehicles and Equipment', carrier='Northfolk Southern',
           period=12, freq='M', subset=0, size=7, color='black')
In [ ]:
doYoyChart(df_ns_ts, metric='Lumber and Wood Products', carrier='Northfolk Southern',
           period=12, freq='M', subset=0, size=7, color='forestgreen')
In [ ]:
doYoyChart(df_ns_ts, metric='Pulp, Paper and Allied Products', carrier='Northfolk Southern',
           period=12, freq='M', subset=0, size=7, color='forestgreen')
In [ ]:
doYoyChart(df_ns_ts, metric='Primary Forest Products', carrier='Northfolk Southern',
           period=12, freq='M', subset=0, size=7, color='forestgreen')
In [ ]:
doYoyChart(df_ns_ts, metric='Chemicals', carrier='Northfolk Southern',
           period=12, freq='M', subset=0, size=7, color='darkseagreen')
In [ ]:
doYoyChart(df_ns_ts, metric='Iron and Steel Scrap', carrier='Northfolk Southern',
           period=12, freq='M', subset=0, size=7, color='grey')
In [ ]:
doYoyChart(df_ns_ts, metric='Total Carloadings', carrier='Northfolk Southern',
           period=12, freq='M', subset=0, size=7)
In [ ]:
doYoyChart(df_ns_ts, metric='Total Intermodal', carrier='Northfolk Southern',
           period=12, freq='M', subset=0, size=7)
In [ ]:
doYoyChart(df_ns_ts, metric='Metals and Products', carrier='Northfolk Southern',
           period=12, freq='M', subset=0, size=7, color='grey')
In [ ]:
doYoyChart(df_ns_ts, metric='Metallic Ores', carrier='Northfolk Southern',
           period=12, freq='M', subset=0, size=7, color='black')
In [ ]:
doYoyChart(df_ns_ts, metric='Container', carrier='Northfolk Southern',
           period=12, freq='M', subset=0, size=7)
In [ ]:
# https://www.bnsf.com/about-bnsf/financial-information/index.html#Weekly+Carload+Reports

numPages = 11
#"""
res = []
for page in range(1, numPages+1):
    sleep(60)
    res += googleSearch("site:bnsf.com filetype:pdf weekly imodcarload", first_page=page, pages=numPages)
#"""

#res = googleSearch("site:bnsf.com filetype:pdf weekly imodcarload", pages=numPages)

links = [r.link for r in res if 'weekly' in r.name.lower() and 'imodcarload' in r.name.lower()]
In [ ]:
links
In [ ]:
#%%time

uaString = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'

#@memory.cache
def parseBnsf(link, retries=3):
    """
    BNSF Parser
    """
    if retries < 0:
        print("Failed " + link)
        print(e)
        return None
    
    location = link
    
    try:
        dt = location.split("/")[-1].split(".")[0]
        
        req = Request(location, data=None, headers={ 'User-Agent': uaString })

        data = urlopen(req)

        df = read_pdf(BytesIO(data.read()), pages='all')[0]

        df_t = df.set_index(df.columns[0]).iloc[:, 2]
        df_t = df_t.dropna().str.replace(",", "").apply(pd.to_numeric)
        df_t = df_t.T
        df_t['dt'] = pd.to_datetime(dt)
        df_t['source'] = df_t.index.map(lambda v: location) 

        df_to = df.set_index(df.columns[0]).iloc[:, 5]
        df_to = df_to.dropna().str.replace(",", "").apply(pd.to_numeric)
        df_to = df_to.T
        df_to['dt'] = pd.to_datetime(dt) - pd.DateOffset(weeks=52)
        
        return pd.concat([df_t, df_to], axis=1)
    except Exception as e:
        sleep(2 ** (3 - retries))
        return parseBnsf(location, retries-1)  
        
dfs = [parseBnsf(l) for l in set(links)]
In [ ]:
df_bnsf = pd.concat(dfs, axis=1).T.reset_index() #.shape
df_bnsf.columns = ['Total Intermodal'] + df_bnsf.columns[1:].to_list()

use = [c for c in df_bnsf.columns if "source" not in c]

df_bnsf['Total Intermodal'] = df_bnsf['Total Intermodal'].str.replace(",", "").apply(pd.to_numeric)

# df_bnfs.to_csv("bnsf.csv", sep="|")

#df_bnsf = pd.read_csv('bnsf.csv', sep="|")

df_bnsf['dt'] = df_bnsf['dt'].apply(pd.to_datetime)

df_bnsf_ts = df_bnsf[use].groupby('dt').agg('mean').applymap(float).resample('1M').nearest().reset_index()

df_melted = df_bnsf[use].melt(id_vars='dt')
df_melted['value'] = df_melted['value'].apply(float)

df_melted = df_melted.groupby(['dt', 'variable']).agg('mean').reset_index() #.set_index('dt')

df_tab = df_melted.pivot_table(index='dt', columns='variable').resample('1M').mean()

df_bnsf_ts = df_tab.droplevel(0, axis=1).reset_index()

~BNSF Freight Reports~

In [ ]:
doChart(df=df_bnsf_ts, metric='Total Intermodal', color='slategray')
In [ ]:
doYoyChart(df_bnsf_ts, metric='LUMBER/WOOD', period=12, freq='M', color='forestgreen')
In [ ]:
doYoyChart(df_bnsf_ts, metric='PULP/PAPER', period=12, freq='M', color='forestgreen', size=7)
In [ ]:
doYoyChart(df_bnsf_ts, metric='FOREST PRODUCTS', period=12, freq='M', color='forestgreen', size=7)
In [ ]:
doYoyChart(df_bnsf_ts, metric='CHEMICALS', period=12, freq='M', color='darkseagreen', size=7)
In [ ]:
doYoyChart(df_bnsf_ts, metric='FOOD', period=12, freq='M', color='green', size=7)
In [ ]:
doYoyChart(df_bnsf_ts, metric='Total Intermodal', period=12, freq='M', size=7)
In [ ]:
doYoyChart(df_bnsf_ts, metric='IRON & STEEL SCRAP', period=12, freq='M', size=7, color='slategrey')
In [ ]:
doYoyChart(df_bnsf_ts, metric='METALS', period=12, freq='M', size=7, color='black')
In [ ]:
doYoyChart(df_bnsf_ts, metric='COAL', period=12, freq='M', size=7, color='black')
In [ ]:
doYoyChart(df_bnsf_ts, metric='GRAIN', period=12, freq='M', size=7, color='goldenrod')

© kdunn926